Assignment 13



In [165]:

    
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as ss
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit
from pylab import *
import itertools
from lmfit import Model

np.random.seed(12345678)  # for reproducibility, set random seed

# Read in data
df = pd.read_csv('../output.csv')

nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox

xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()

# Get rid of the blank edges
bottom = 0;
top = len(xvals);
right = 0;
left = len(yvals);
for z in zvals:
    this_z = df[df['cz']==z]
    
    # X direction
    xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
    
    bottom = max(bottom, np.argmax(xhist>0.5))
    top = min(top, len(xvals)-np.argmax(xhist[::-1]>0.5))
    
    # Y direction
    yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
    
    right = max(right, np.argmax(yhist>0.5))
    left = min(left, len(yvals)-np.argmax(yhist[::-1]>0.5))

# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
    df2.drop(df2.index[(df2['cx']<xvals[bottom]) | (df2['cx']>=xvals[top])], inplace=True)
    df2.drop(df2.index[(df2['cy']<yvals[right]) | (df2['cy']>=yvals[left])], inplace=True)

xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()

df2.head()

Fit 2D data to X-Y coordinate sums



In [13]:

    
dfxy = pd.pivot_table(df2, index='cy', columns='cx', values='weighted', aggfunc=np.mean)

df3 = dfxy.unstack().rename('weighted').reset_index()
df3.head()



In [21]:

    
from numpy.polynomial import polynomial as P

def polyfit2d(x, y, f, deg):
    x = np.asarray(x)
    y = np.asarray(y)
    f = np.asarray(f)
    deg = np.asarray(deg)
    vander = P.polyvander2d(x, y, deg)
    vander = vander.reshape((-1,vander.shape[-1]))
    f = f.reshape((vander.shape[0],))
    c = np.linalg.lstsq(vander, f)[0]
    return c.reshape(deg+1)

x_point = np.array(df3['cx'])
y_point = np.array(df3['cy'])
synapses = np.array(df3['weighted'])

for deg in range(5):
    popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
    chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
    reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

print '2D polynomial of degree = 2 gives the best fit'









    



Testing polynomial model of degree = 0
The degrees of freedom for this test is 6383
The chi squared value is:  4103057.80
The reduced chi squared value is:  642.81

Testing polynomial model of degree = 1
The degrees of freedom for this test is 6382
The chi squared value is:  3051500.33
The reduced chi squared value is:  478.14

Testing polynomial model of degree = 2
The degrees of freedom for this test is 6381
The chi squared value is:  2621617.98
The reduced chi squared value is:  410.85

Testing polynomial model of degree = 3
The degrees of freedom for this test is 6380
The chi squared value is:  2910471.17
The reduced chi squared value is:  456.19

Testing polynomial model of degree = 4
The degrees of freedom for this test is 6379
The chi squared value is:  10483687.82
The reduced chi squared value is:  1643.47

Testing logarithmic model
The degrees of freedom for this test is 6382
The chi squared value is:  3194949.04
The reduced chi squared value is:  500.62

Testing powerlaw model
The degrees of freedom for this test is 6382
The chi squared value is:  3235132.18
The reduced chi squared value is:  506.92

2D polynomial of degree = 2 gives the best fit

Plot the fitted and actual number of synapses distribution



In [23]:

    
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df3['pred'] = pred

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'], 
                     s = df3['weighted']/10, c = df3['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Mean Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Distribution of Synapses in X-Y plane", fontsize=fs)
cbar = plt.colorbar(patches)
cbar.ax.tick_params(labelsize=tfs) 
cbar.ax.set_ylabel('Mean Number of Synapses', fontsize=fs)
plt.tight_layout()

plt.savefig('./figs/weighted_synapses/XY_means.png', format='png', dpi=300)
ax.view_init(azim = 35)
plt.savefig('./figs/weighted_synapses/XY_means_azim_35.png', format='png', dpi=300)

plt.show()
    
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df3, index='cy', columns='cx', values='pred')

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
# ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
cbar = plt.colorbar(patches)
cbar.ax.tick_params(labelsize=tfs) 
cbar.ax.set_ylabel('Mean Number of Synapses', fontsize=fs)

plt.tight_layout()

plt.savefig('./figs/weighted_synapses/XY_quadratic_fit.png', format='png', dpi=300)
ax.view_init(azim = 35)
plt.savefig('./figs/weighted_synapses/XY_quadratic_fit_azim_35.png', format='png', dpi=300)

plt.show()

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'], 
                     s = df3['weighted']/10, c = df3['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
# ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
cbar = plt.colorbar(patches)
cbar.ax.tick_params(labelsize=tfs) 
cbar.ax.set_ylabel('Mean Number of Synapses', fontsize=fs)

plt.tight_layout()

plt.savefig('./figs/weighted_synapses/XY_actual_fit.png', format='png', dpi=300)
ax.view_init(azim = 35)
plt.savefig('./figs/weighted_synapses/XY_actual_fit_azim_35.png', format='png', dpi=300)

plt.show()



In [27]:

    
residual = abs(pred - df3['weighted'])

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = residual, 
                     s = residual, c = residual, cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [60]:

    
df3['resid'] = df3['pred'] - df3['weighted']

sns.set_style('white')

bottom = df3['resid'].min()
top = df3['resid'].max()

delta = 2*(xvals[1]-xvals[0])
extra = delta / (np.sqrt(3)/2)
left = np.min(xvals) - extra
right = np.max(xvals) + extra

with sns.plotting_context('notebook', font_scale=1.5):
    g = sns.jointplot(x='cx', y='resid', data = df3, kind='hex',
                      joint_kws={'gridsize':len(xvals)/2+1, 'extent':(left, right, bottom, top)},
                      marginal_kws={'bins':len(xvals)/2})
#     g = g.plot_marginals(sns.distplot, kde=True)
    g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
    sns.axlabel('X (um)', 'Residuals', fontsize=fs)
    plt.gcf().axes[1].set_title('Residuals viewed along X')
    plt.savefig('./figs/weighted_synapses/XY_quadratic_fit_X_resid.png', format='png', dpi=300)
    plt.show()
    
delta = 2*(yvals[1]-yvals[0])
extra = delta / (np.sqrt(3)/2)
left = np.min(yvals) - extra
right = np.max(yvals) + extra

with sns.plotting_context('notebook', font_scale=1.5):
    g = sns.jointplot(x='cy', y='resid', data = df3, kind='hex',
                      joint_kws={'gridsize':len(yvals)/2+1, 'extent':(left, right, bottom, top)},
                      marginal_kws={'bins':len(yvals)/2})
    g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
    sns.axlabel('Y (um)', 'Residuals', fontsize=fs)
    plt.gcf().axes[1].set_title('Residuals viewed along Y')
    plt.savefig('./figs/weighted_synapses/XY_quadratic_fit_Y_resid.png', format='png', dpi=300)
    plt.show()



In [71]:

    
sns.set_style('darkgrid')

plt.figure()
sns.distplot(df3['resid'])
plt.xlabel('Residual', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.yticks(np.linspace(0,0.016,5))
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/XY_quadratic_fit_resid.png', format='png', dpi=300)
plt.show()

Aligning Y-layers by means



In [6]:

    
mean = df2['weighted'].mean()
ymeans = [df2[df2['cy']==y]['weighted'].mean() for y in yvals]

df4 = df2.copy()
for y, ymean in zip(yvals, ymeans):
    df4.loc[df4['cy']==y, 'weighted'] = df4.loc[df4['cy']==y, 'weighted'] - (ymean - mean)

df4.head()



In [7]:

    
fs = 18

bottom = df4['weighted'].min()
top = df4['weighted'].max()

sns.set_style('white')

avals_list = [xvals, yvals, zvals]
anames = ['cx', 'cy', 'cz']
alabels = ['X (um)', 'Y (um)', 'Z (um)']
ascales = [2, 2, 1]

for avals, aname, alabel, ascale in zip(avals_list, anames, alabels, ascales):
    delta = 2*(avals[1]-avals[0])
    extra = delta / (np.sqrt(3)/2)
    left = np.min(avals) - extra
    right = np.max(avals) + extra
    
    with sns.plotting_context('notebook', font_scale=1.5):
        g = sns.jointplot(x=aname, y='weighted', data = df4, kind='hex',
                          joint_kws={'gridsize':len(avals)/ascale+1, 'extent':(left, right, bottom, top)},
                          marginal_kws={'bins':len(avals)/ascale})
        g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
        sns.axlabel(alabel, 'Weighted synapses', fontsize=fs)

Fit 2D data to Y-aligned data



In [8]:

    
dfxy = pd.pivot_table(df4, index='cy', columns='cx', values='weighted', aggfunc=np.sum)

df5 = dfxy.unstack().rename('weighted').reset_index()
df5.head()









    Out[8]:






  
    
      
      cx
      cy
      weighted
    
  
  
    
      0
      448
      1369
      2588.785248
    
    
      1
      448
      1408
      2343.006049
    
    
      2
      448
      1447
      2759.237726
    
    
      3
      448
      1486
      2519.679015
    
    
      4
      448
      1525
      2931.327981

Test different 2D models



In [9]:

    
from numpy.polynomial import polynomial as P

def polyfit2d(x, y, f, deg):
    x = np.asarray(x)
    y = np.asarray(y)
    f = np.asarray(f)
    deg = np.asarray(deg)
    vander = P.polyvander2d(x, y, deg)
    vander = vander.reshape((-1,vander.shape[-1]))
    f = f.reshape((vander.shape[0],))
    c = np.linalg.lstsq(vander, f)[0]
    return c.reshape(deg+1)

x_point = np.array(df5['cx'])
y_point = np.array(df5['cy'])
synapses = np.array(df5['weighted'])

for deg in range(5):
    popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
    chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
    reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

print '2D polynomial of degree = 2 gives the best fit'









    



Testing polynomial model of degree = 0
The degrees of freedom for this test is 6383
The chi squared value is:  359140617.31
The reduced chi squared value is:  56265.18

Testing polynomial model of degree = 1
The degrees of freedom for this test is 6382
The chi squared value is:  341398566.23
The reduced chi squared value is:  53493.98

Testing polynomial model of degree = 2
The degrees of freedom for this test is 6381
The chi squared value is:  304238990.09
The reduced chi squared value is:  47678.89

Testing polynomial model of degree = 3
The degrees of freedom for this test is 6380
The chi squared value is:  339413567.81
The reduced chi squared value is:  53199.62

Testing polynomial model of degree = 4
The degrees of freedom for this test is 6379
The chi squared value is:  1107177349.69
The reduced chi squared value is:  173565.97

Testing logarithmic model
The degrees of freedom for this test is 6382
The chi squared value is:  350037567.44
The reduced chi squared value is:  54847.63

Testing powerlaw model
The degrees of freedom for this test is 6382
The chi squared value is:  351360422.51
The reduced chi squared value is:  55054.91

2D polynomial of degree = 2 gives the best fit

Plot the fitted and actual number of synapses distribution



In [10]:

    
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df5['pred'] = pred

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'], 
                     s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
    
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df5, index='cy', columns='cx', values='pred')

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'], 
                     s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [11]:

    
residual = abs(pred - df5['weighted'])

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = residual, 
                     s = residual/10, c = residual, cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()

3D clustering by synapse density



In [156]:

    
X = df2.loc[:,('cx','cy','cz')]
Xnew = X.copy()
for i, x in enumerate(xvals):
    Xnew.loc[Xnew['cx']==x, 'cx'] = i
for j, y in enumerate(yvals):
    Xnew.loc[Xnew['cy']==y, 'cy'] = j
for k, z in enumerate(zvals):
    Xnew.loc[Xnew['cz']==z, 'cz'] = k
Xnew.columns = ['i', 'j', 'k']
Xnew.head()



In [157]:

    
from sklearn.cluster import DBSCAN

# Compute DBSCAN
db = DBSCAN(eps=1, min_samples=2000).fit(Xnew, sample_weight=df2['weighted'])
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters_)









    



Estimated number of clusters: 142



In [160]:

    
# Plot result

sns.set_style('white')

fs = 18
tfs = 14

# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xyz = X[class_member_mask & core_samples_mask]
    ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col, label=k)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X (um)', fontsize = fs, labelpad=20)
plt.ylabel('Y (um)', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Z (um)', fontsize = fs, labelpad=20)
plt.title('Estimated number of clusters: %d' % n_clusters_, fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [163]:

    
# Plot result

sns.set_style('white')

fs = 18
tfs = 14

# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

print "Cluster sizes:"
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xyz = X[class_member_mask & core_samples_mask]
    if len(xyz)>1000:
        print np.sum(class_member_mask)
        ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col)
        
        xyz = X[class_member_mask & ~core_samples_mask]
        ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col, s=6)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X (um)', fontsize = fs, labelpad=20)
plt.ylabel('Y (um)', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Z (um)', fontsize = fs, labelpad=20)
plt.title('Large clusters', fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/DBSCAN_large_clusters.png', format='png', dpi=300)
ax.view_init(azim = 45)
plt.savefig('./figs/weighted_synapses/DBSCAN_large_clusters_azim_35.png', format='png', dpi=300)
plt.show()









    



Cluster sizes:
5599
6074



In [16]:

    
fs = 18
tfs = 14

df2['label']  = labels

for k in [0, 83]:
    plt.figure(figsize=(16,8))
    print "Group", k
    
    for zi, z in enumerate(zvals):
        XY = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='weighted')
        XYlabel = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='label')
        XYmask = XYlabel != k
        
        plt.subplot(3,4,1+zi)
        sns.heatmap(XY, xticklabels=40, yticklabels=30, 
                    vmin=df2['weighted'].min(), vmax=df2['weighted'].max(), cbar=False, cmap=cm.get_cmap('winter'),
                    mask=XYmask)
        plt.gca().set_title('Z = '+str(z)+' um', fontsize=fs)
        plt.gca().set_xlabel('X (um)', fontsize=fs)
        plt.gca().set_ylabel('Y (um)', fontsize=fs)
        plt.tick_params(axis='both', which='major', labelsize=tfs)

    plt.tight_layout()
    plt.show()

Prepare data set



In [72]:

    
xval = df2['cx'].unique()
yval = df2['cy'].unique()
zval = df2['cz'].unique()
ncx = []
ncy = []
ncz = []
navg = []

output_x = [0, 0]
output_y = [0, 0]
output_z = [0, 0]

for i  in (xval):
    val_x = df2[(df2['cx'] == i)]['weighted'].mean()
    output_x = vstack((output_x, [i, val_x]))
for i  in (yval):
    val_y = df2[(df2['cy'] == i)]['weighted'].mean()
    output_y = vstack((output_y, [i, val_y]))
for i  in (zval):
    val_z = df2[(df2['cz'] == i)]['weighted'].mean()
    output_z = vstack((output_z, [i, val_z]))

output_x = pd.DataFrame(output_x) ; output_x.columns = ['cx', 'AVG_weighted']
output_y = pd.DataFrame(output_y) ; output_y.columns = ['cy', 'AVG_weighted']
output_z = pd.DataFrame(output_z) ; output_z.columns = ['cz', 'AVG_weighted']    
m, n = output_x.shape ; output_x = output_x[1:m]
m, n = output_y.shape ; output_y = output_y[1:m]
m, n = output_z.shape ; output_z = output_z[1:m]

Fitting X-dimension curve (using average synapses)



In [75]:

    
x_data = output_x['cx']
synapses = output_x['AVG_weighted']

def test_linear_model(x, a, b):
    return a * x + b 
popt = curve_fit(test_linear_model, x_data, synapses)
chi_squared = np.sum(((test_linear_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing linear_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_1 = Model(test_linear_model)
result_1 = model_1.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_quadratic_model(x, a, b, c):
    return a * x ** 2 + b * x + c
popt = curve_fit(test_quadratic_model, x_data, synapses)
chi_squared = np.sum(((test_quadratic_model(x_data, popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing quadratic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_2 = Model(test_quadratic_model)
result_2 = model_2.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_cubic_model(x, a, b, c, d):
    return a * x ** 3 + b * x ** 2 + c * x + d 
popt = curve_fit(test_cubic_model, x_data, synapses)
chi_squared = np.sum(((test_cubic_model(x_data, popt[0][0], popt[0][1], popt[0][2], popt[0][3]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing cubic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_3 = Model(test_cubic_model)
result_3 = model_3.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3])

def test_quartic_model(x, a, b, c):
    return a * x ** 4 + b * x ** 2 + c   
popt = curve_fit(test_quartic_model, x_data, synapses)
chi_squared = np.sum(((test_quartic_model(x_data, popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing quartic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_4 = Model(test_quartic_model)
result_4 = model_4.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1], c = popt[0][2])


def test_logarithmic_model(x, a, b):
    return a * np.log(x) + b    
popt = curve_fit(test_logarithmic_model, x_data, synapses)
chi_squared = np.sum(((test_logarithmic_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing logarithmic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_5 = Model(test_logarithmic_model)
result_5 = model_5.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_powerlaw_model(x, a, b):
    return a * (x ** b) 
popt = curve_fit(test_powerlaw_model, x_data, synapses)
chi_squared = np.sum(((test_powerlaw_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))

print 'Testing powerlaw_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_6 = Model(test_powerlaw_model)
result_6 = model_6.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])









    



Testing linear_model
The degrees of freedom for this test is 82
The chi squared value is:  9078.14
The reduced chi squared value is:  110.71

 - Adding parameter "a"
 - Adding parameter "b"
Testing quadratic_model
The degrees of freedom for this test is 82
The chi squared value is:  4269.20
The reduced chi squared value is:  52.06

 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
Testing cubic_model
The degrees of freedom for this test is 82
The chi squared value is:  4268.00
The reduced chi squared value is:  52.05

 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "d"
Testing quartic_model
The degrees of freedom for this test is 82
The chi squared value is:  4948.63
The reduced chi squared value is:  60.35

 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
Testing logarithmic_model
The degrees of freedom for this test is 82
The chi squared value is:  10972.99
The reduced chi squared value is:  133.82

 - Adding parameter "a"
 - Adding parameter "b"
Testing powerlaw_model
The degrees of freedom for this test is 82
The chi squared value is:  10909.88
The reduced chi squared value is:  133.05

 - Adding parameter "a"
 - Adding parameter "b"

Fitting Results



In [74]:

    
print result_1.fit_report()
print result_2.fit_report()
print result_3.fit_report()
print result_4.fit_report()
print result_5.fit_report()
print result_6.fit_report()
print "According to AIC, Cubic Model is so far the best fitted model among 6 candidates."









    



[[Model]]
    Model(test_linear_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 84
    # variables        = 2
    chi-square         = 9078.142
    reduced chi-square = 110.709
[[Variables]]
    a:   0.00715128 +/- 0.001214 (16.98%) (init= 0.007151287)
    b:   226.743411 +/- 2.758986 (1.22%) (init= 226.7434)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.909 

[[Model]]
    Model(test_quadratic_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 84
    # variables        = 2
    chi-square         = 7351.158
    reduced chi-square = 89.648
[[Variables]]
    a:   2.0410e-06 +/- 2.59e-07 (12.69%) (init= 2.040953e-06)
    b:   230.980750 +/- 1.689977 (0.73%) (init= 230.9807)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.791 

[[Model]]
    Model(test_cubic_model)
[[Fit Statistics]]
    # function evals   = 8
    # data points      = 84
    # variables        = 4
    chi-square         = 4267.998
    reduced chi-square = 53.350
[[Variables]]
    a:  -1.8021e-10 +/- 1.20e-09 (666.24%) (init=-1.802261e-10)
    b:   1.0579e-05 +/- 7.51e-06 (70.98%) (init= 1.057941e-05)
    c:  -0.03397425 +/- 0.014091 (41.48%) (init=-0.03397445)
    d:   259.680361 +/- 7.653940 (2.95%) (init= 259.6805)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.991 
    C(b, c)                      = -0.985 
    C(c, d)                      = -0.965 
    C(a, c)                      =  0.954 
    C(b, d)                      =  0.913 
    C(a, d)                      = -0.863 

[[Model]]
    Model(test_quartic_model)
[[Fit Statistics]]
    # function evals   = 6
    # data points      = 84
    # variables        = 3
    chi-square         = 4948.629
    reduced chi-square = 61.094
[[Variables]]
    a:   3.8727e-13 +/- 6.18e-14 (15.95%) (init= 3.872662e-13)
    b:  -2.7646e-06 +/- 7.96e-07 (28.78%) (init=-2.764555e-06)
    c:   239.306676 +/- 1.925907 (0.80%) (init= 239.3067)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.963 
    C(b, c)                      = -0.818 
    C(a, c)                      =  0.689 

[[Model]]
    Model(test_logarithmic_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 84
    # variables        = 2
    chi-square         = 10972.994
    reduced chi-square = 133.817
[[Variables]]
    a:   8.62254211 +/- 2.260764 (26.22%) (init= 8.622542)
    b:   176.856960 +/- 17.00147 (9.61%) (init= 176.857)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.997 

[[Model]]
    Model(test_powerlaw_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 84
    # variables        = 2
    chi-square         = 10909.881
    reduced chi-square = 133.047
[[Variables]]
    a:   182.761897 +/- 13.06483 (7.15%) (init= 182.7619)
    b:   0.03714308 +/- 0.009477 (25.52%) (init= 0.03714309)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.997 

According to AIC, Cubic Model is so far the best fitted model among 6 candidates.

Plot the actual and fitted results



In [8]:

    
popt = curve_fit(test_cubic_model, x_data, synapses)
predi = test_cubic_model(x_data, popt[0][0], popt[0][1], popt[0][2], popt[0][3])

ax = plt.figure(figsize=(10, 10)).gca()
ax.scatter(x = x_data, y = predi, s = 50, label = "Fitted",
                      c = 'b', marker = 'x')
ax.scatter(x = x_data, y = output_x['AVG_weighted'], s = 50, label = "Actual",
                      c = 'c', marker = 'o' )
ax.set_title("Comparison of Acutal and Fitted Number of Mean Synapses",fontsize = 20)
ax.set_xlabel("X - Coordinate",fontsize = 20)
ax.set_ylabel("Mean Weighted Synapses",fontsize = 20)
plt.legend(fontsize = 15, markerscale = 2)
plt.show()

Residual Plot



In [9]:

    
Residual = (predi - output_x['AVG_weighted']) 
l = Line2D([0, 5000],[0, 0])                                    
ax = plt.figure(figsize=(8, 8)).gca()
ax.add_line(l) 
ax.scatter(x = x_data, y = Residual, s = 50, label = "Residual", c = 'r', marker = 'o')
ax.set_title("Residual Analysis",fontsize = 20)
ax.set_xlabel("X - Coordinate",fontsize = 20)
ax.set_ylabel("Residual",fontsize = 20)
plt.xlim(0, 4000)
plt.show()

print "From the residual plot, it's a little bit to see the normality."









    












    



From the residual plot, it's a little bit to see the normality.

Histogram of Error term



In [10]:

    
sns.set(style="white", palette="muted", color_codes=True)
ax = sns.distplot(Residual, color = "b")

ax.set_title("Histogram of Residual",fontsize = 20)
ax.set_xlabel("Deviation",fontsize = 20)
ax.set_ylabel("Frequency",fontsize = 20)

plt.tight_layout()
print "Looks like a normal distribution, suggesting that the error term is randomly distributed."









    



Looks like a normal distribution, suggesting that the error term is randomly distributed.

Fitting Y-dimension curve (using average synapses)



In [11]:

    
x_data = output_y['cy']
synapses = output_y['AVG_weighted']

def test_linear_model(x, a, b):
    return a * x + b 
popt = curve_fit(test_linear_model, x_data, synapses)
chi_squared = np.sum(((test_linear_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing linear_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_1 = Model(test_linear_model)
result_1 = model_1.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_quadratic_model(x, a, b):
    return a * x ** 2 + b 
popt = curve_fit(test_quadratic_model, x_data, synapses)
chi_squared = np.sum(((test_quadratic_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing quadratic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_2 = Model(test_quadratic_model)
result_2 = model_2.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_cubic_model(x, a, b, c, d):
    return a * x ** 3 + b * x ** 2 + c * x + d 
popt = curve_fit(test_cubic_model, x_data, synapses)
chi_squared = np.sum(((test_cubic_model(x_data, popt[0][0], popt[0][1], popt[0][2], popt[0][3]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing cubic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_3 = Model(test_cubic_model)
result_3 = model_3.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3])

def test_quartic_model(x, a, b, c):
    return a * x ** 4 + b * x ** 2 + c   
popt = curve_fit(test_quartic_model, x_data, synapses)
chi_squared = np.sum(((test_quartic_model(x_data, popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing quartic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_4 = Model(test_quartic_model)
result_4 = model_4.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1], c = popt[0][2])


def test_logarithmic_model(x, a, b):
    return a * np.log(x) + b    
popt = curve_fit(test_logarithmic_model, x_data, synapses)
chi_squared = np.sum(((test_logarithmic_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))
print 'Testing logarithmic_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_5 = Model(test_logarithmic_model)
result_5 = model_5.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])

def test_powerlaw_model(x, a, b):
    return a * (x ** b) 
popt = curve_fit(test_powerlaw_model, x_data, synapses)
chi_squared = np.sum(((test_powerlaw_model(x_data, popt[0][0], popt[0][1]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_data)  - len(popt))

print 'Testing powerlaw_model'
print 'The degrees of freedom for this test is', len(x_data)  - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

model_6 = Model(test_powerlaw_model)
result_6 = model_6.fit(synapses, x = x_data, a = popt[0][0], b = popt[0][1])









    



Testing linear_model
The degrees of freedom for this test is 36
The chi squared value is:  2738.39
The reduced chi squared value is:  76.07

Testing quadratic_model
The degrees of freedom for this test is 36
The chi squared value is:  2096.19
The reduced chi squared value is:  58.23

Testing cubic_model
The degrees of freedom for this test is 36
The chi squared value is:  1096.91
The reduced chi squared value is:  30.47

Testing quartic_model
The degrees of freedom for this test is 36
The chi squared value is:  1428.38
The reduced chi squared value is:  39.68

Testing logarithmic_model
The degrees of freedom for this test is 36
The chi squared value is:  3596.15
The reduced chi squared value is:  99.89

Testing powerlaw_model
The degrees of freedom for this test is 36
The chi squared value is:  3895.84
The reduced chi squared value is:  108.22

Fitting results



In [12]:

    
print result_1.fit_report()
print result_2.fit_report()
print result_3.fit_report()
print result_4.fit_report()
print result_5.fit_report()
print result_6.fit_report()
print "According to AIC, Cubic Model is so far the best fitted model among 6 candidates."









    



[[Model]]
    Model(test_linear_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 38
    # variables        = 2
    chi-square         = 2738.388
    reduced chi-square = 76.066
    Akaike info crit   = 168.601
    Bayesian info crit = 171.876
[[Variables]]
    a:  -0.03937029 +/- 0.003308 (8.40%) (init=-0.03937029)
    b:   323.825145 +/- 7.059117 (2.18%) (init= 323.8251)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.980 

[[Model]]
    Model(test_quadratic_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 38
    # variables        = 2
    chi-square         = 2096.193
    reduced chi-square = 58.228
    Akaike info crit   = 158.446
    Bayesian info crit = 161.721
[[Variables]]
    a:  -9.6529e-06 +/- 6.89e-07 (7.14%) (init=-9.65285e-06)
    b:   285.471846 +/- 3.374218 (1.18%) (init= 285.4718)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.930 

[[Model]]
    Model(test_cubic_model)
[[Fit Statistics]]
    # function evals   = 7
    # data points      = 38
    # variables        = 4
    chi-square         = 1096.911
    reduced chi-square = 32.262
    Akaike info crit   = 140.008
    Bayesian info crit = 146.558
[[Variables]]
    a:   3.5537e-08 +/- 1.51e-08 (42.36%) (init= 3.553717e-08)
    b:  -0.00026082 +/- 9.46e-05 (36.26%) (init=-0.0002608223)
    c:   0.57352631 +/- 0.193842 (33.80%) (init= 0.5735263)
    d:  -135.314417 +/- 129.4198 (95.64%) (init=-135.3144)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.998 
    C(b, c)                      = -0.998 
    C(c, d)                      = -0.998 
    C(a, c)                      =  0.993 
    C(b, d)                      =  0.992 
    C(a, d)                      = -0.983 

[[Model]]
    Model(test_quartic_model)
[[Fit Statistics]]
    # function evals   = 6
    # data points      = 38
    # variables        = 3
    chi-square         = 1428.378
    reduced chi-square = 40.811
    Akaike info crit   = 146.940
    Bayesian info crit = 151.853
[[Variables]]
    a:  -1.4656e-12 +/- 3.62e-13 (24.72%) (init=-1.465563e-12)
    b:   4.3308e-06 +/- 3.50e-06 (80.93%) (init= 4.330752e-06)
    c:   256.910167 +/- 7.604752 (2.96%) (init= 256.9102)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -0.986 
    C(b, c)                      = -0.973 
    C(a, c)                      =  0.928 

[[Model]]
    Model(test_logarithmic_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 38
    # variables        = 2
    chi-square         = 3596.150
    reduced chi-square = 99.893
    Akaike info crit   = 178.956
    Bayesian info crit = 182.231
[[Variables]]
    a:  -76.5738461 +/- 7.685946 (10.04%) (init=-76.57385)
    b:   825.274187 +/- 58.61542 (7.10%) (init= 825.2742)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -1.000 

[[Model]]
    Model(test_powerlaw_model)
[[Fit Statistics]]
    # function evals   = 5
    # data points      = 38
    # variables        = 2
    chi-square         = 3895.840
    reduced chi-square = 108.218
    Akaike info crit   = 181.998
    Bayesian info crit = 185.273
[[Variables]]
    a:   2464.13786 +/- 611.8065 (24.83%) (init= 2464.138)
    b:  -0.30493646 +/- 0.032673 (10.71%) (init=-0.3049365)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, b)                      = -1.000 

According to AIC, Cubic Model is so far the best fitted model among 6 candidates.

Plot the actual and fitted and data



In [13]:

    
popt = curve_fit(test_cubic_model, x_data, synapses)
predi_y = test_cubic_model(x_data, popt[0][0], popt[0][1], popt[0][2], popt[0][3])

ax = plt.figure(figsize=(10, 10)).gca()
ax.scatter(x = x_data, y = predi_y, s = 50, label = "Fitted",
                      c = 'b', marker = 'x')
ax.scatter(x = x_data, y = output_y['AVG_weighted'], s = 50, label = "Actual",
                      c = 'c', marker = 'o' )
ax.set_title("Comparison of Acutal and Fitted Number of Mean Synapses",fontsize = 20)
ax.set_xlabel("Y - Coordinate",fontsize = 20)
ax.set_ylabel("Mean Weighted Synapses",fontsize = 20)
plt.legend(fontsize = 15, markerscale = 2)
plt.show()

Residual Plot



In [14]:

    
Residual_y = (predi_y - output_y['AVG_weighted']) 
l = Line2D([0, 5000],[0, 0])                                    
ax = plt.figure(figsize=(8, 8)).gca()
ax.add_line(l) 
ax.scatter(x = x_data, y = Residual_y, s = 50, label = "Residual", c = 'r', marker = 'o')
ax.set_title("Residual Analysis",fontsize = 20)
ax.set_xlabel("Y - Coordinate",fontsize = 20)
ax.set_ylabel("Residual",fontsize = 20)
plt.xlim(min(output_y['cy']) - 100, max(output_y['cy']) + 100)
plt.show()

print "From the residual plot, it's kind of randomly distributed."









    












    



From the residual plot, it's kind of randomly distributed.

Histogram of Error term



In [15]:

    
sns.set(style="white", palette="muted", color_codes=True)
ax = sns.distplot(Residual_y, color = "b")

ax.set_title("Histogram of Residual",fontsize = 20)
ax.set_xlabel("Deviation",fontsize = 20)
ax.set_ylabel("Frequency",fontsize = 20)

plt.tight_layout()
print "Although a little bit skewed, it still looks like a normal distribution,"
print "suggesting that the error term is randomly distributed."









    



Although a little bit skewed, it still looks like a normal distribution,
suggesting that the error term is randomly distributed.

Fitting with complete polynomials



In [146]:

    
Xmeans = df2.groupby('cx').aggregate(np.mean)
x_point = np.array(Xmeans.index)
synapses = np.array(Xmeans['weighted'])

for deg in range(5):
    popt = P.polyfit(x_point, synapses, deg=deg)
    chi_squared = np.array(np.sum((P.polyval(x_point, popt) - synapses) ** 2))
    reduced_chi_squared = chi_squared / (len(x_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(x_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = P.polyfit(np.log(x_point), synapses, deg=1)
chi_squared = np.sum((P.polyval(np.log(x_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = P.polyfit(np.log(x_point), np.log(synapses), deg=1)
chi_squared = np.sum((np.exp(P.polyval(np.log(x_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 



print '1D polynomial of degree = 4 gives the best fit'









    



Testing polynomial model of degree = 0
The degrees of freedom for this test is 83
The chi squared value is:  12919.57
The reduced chi squared value is:  155.66

Testing polynomial model of degree = 1
The degrees of freedom for this test is 82
The chi squared value is:  9078.14
The reduced chi squared value is:  110.71

Testing polynomial model of degree = 2
The degrees of freedom for this test is 81
The chi squared value is:  4269.20
The reduced chi squared value is:  52.71

Testing polynomial model of degree = 3
The degrees of freedom for this test is 80
The chi squared value is:  4268.00
The reduced chi squared value is:  53.35

Testing polynomial model of degree = 4
The degrees of freedom for this test is 79
The chi squared value is:  2935.33
The reduced chi squared value is:  37.16

Testing logarithmic model
The degrees of freedom for this test is 82
The chi squared value is:  10972.99
The reduced chi squared value is:  133.82

Testing powerlaw model
The degrees of freedom for this test is 82
The chi squared value is:  10927.54
The reduced chi squared value is:  133.26

1D polynomial of degree = 4 gives the best fit



In [147]:

    
popt = P.polyfit(x_point, synapses, deg=2)
pred = P.polyval(x_point, popt)

Xmeans['pred'] = pred
Xmeans['resid'] = Xmeans['pred']-Xmeans['weighted']
Xmeans['resid'] = (Xmeans['resid']-Xmeans['resid'].mean())/Xmeans['resid'].std()

fs = 18
tfs = 14

plt.figure()
sns.regplot(xvals, Xmeans['weighted'], fit_reg=False, label='Actual')
plt.plot(xvals, Xmeans['pred'], label='Fitted')
plt.legend(loc=4, fontsize=tfs)
plt.xlabel('X (um)', fontsize=fs)
plt.ylabel('Mean Number of Synapses', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in X', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quadratic_fit.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.regplot(xvals, Xmeans['resid'], fit_reg=False, label='Actual')
plt.plot(xvals, Xmeans['pred']-Xmeans['pred'], label='Fitted')
plt.xlabel('X (um)', fontsize=fs)
plt.ylabel('Scaled Residuals', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in X', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quadratic_fit_resid.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.distplot(Xmeans['resid'])
plt.xlabel('Scaled Residuals', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quadratic_fit_resid_hist.png', format='png', dpi=300)
plt.show()



In [148]:

    
popt = P.polyfit(x_point, synapses, deg=4)
pred = P.polyval(x_point, popt)

Xmeans['pred'] = pred
Xmeans['resid'] = Xmeans['pred']-Xmeans['weighted']
Xmeans['resid'] = (Xmeans['resid']-Xmeans['resid'].mean())/Xmeans['resid'].std()

fs = 18
tfs = 14

plt.figure()
sns.regplot(xvals, Xmeans['weighted'], fit_reg=False, label='Actual')
plt.plot(xvals, Xmeans['pred'], label='Fitted')
plt.legend(loc=4, fontsize=tfs)
plt.xlabel('X (um)', fontsize=fs)
plt.ylabel('Mean Number of Synapses', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in X', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quartic_fit.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.regplot(xvals, Xmeans['resid'], fit_reg=False, label='Actual')
plt.plot(xvals, Xmeans['pred']-Xmeans['pred'], label='Fitted')
plt.xlabel('X (um)', fontsize=fs)
plt.ylabel('Scaled Residuals', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in X', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quartic_fit_resid.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.distplot(Xmeans['resid'])
plt.xlabel('Scaled Residuals', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/X_quartic_fit_resid_hist.png', format='png', dpi=300)
plt.show()



In [166]:

    
Ymeans = df2.groupby('cy').aggregate(np.mean)
y_point = np.array(Ymeans.index)
synapses = np.array(Ymeans['weighted'])

for deg in range(5):
    popt = P.polyfit(y_point, synapses, deg=deg)
    chi_squared = np.array(np.sum((P.polyval(y_point, popt) - synapses) ** 2))
    reduced_chi_squared = chi_squared / (len(y_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(y_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = P.polyfit(np.log(y_point), synapses, deg=1)
chi_squared = np.sum((P.polyval(np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = P.polyfit(np.log(y_point), np.log(synapses), deg=1)
chi_squared = np.sum((np.exp(P.polyval(np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 



print '1D polynomial of degree = 4 gives the best fit'









    



Testing polynomial model of degree = 0
The degrees of freedom for this test is 37
The chi squared value is:  13511.35
The reduced chi squared value is:  365.17

Testing polynomial model of degree = 1
The degrees of freedom for this test is 36
The chi squared value is:  2738.39
The reduced chi squared value is:  76.07

Testing polynomial model of degree = 2
The degrees of freedom for this test is 35
The chi squared value is:  1276.74
The reduced chi squared value is:  36.48

Testing polynomial model of degree = 3
The degrees of freedom for this test is 34
The chi squared value is:  1096.91
The reduced chi squared value is:  32.26

Testing polynomial model of degree = 4
The degrees of freedom for this test is 33
The chi squared value is:  998.71
The reduced chi squared value is:  30.26

Testing logarithmic model
The degrees of freedom for this test is 36
The chi squared value is:  3596.15
The reduced chi squared value is:  99.89

Testing powerlaw model
The degrees of freedom for this test is 36
The chi squared value is:  3933.42
The reduced chi squared value is:  109.26

1D polynomial of degree = 4 gives the best fit



In [167]:

    
popt = P.polyfit(y_point, synapses, deg=2)
pred = P.polyval(y_point, popt)

Ymeans['pred'] = pred
Ymeans['resid'] = Ymeans['pred']-Ymeans['weighted']
Ymeans['resid'] = (Ymeans['resid']-Ymeans['resid'].mean())/Ymeans['resid'].std()

fs = 18
tfs = 14

plt.figure()
sns.regplot(y_point, Ymeans['weighted'], fit_reg=False, label='Actual')
plt.plot(y_point, Ymeans['pred'], label='Fitted')
plt.legend(fontsize=tfs)
plt.xlabel('Y (um)', fontsize=fs)
plt.ylabel('Mean Number of Synapses', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Y', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quadratic_fit.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.regplot(y_point, Ymeans['resid'], fit_reg=False, label='Actual')
plt.plot(y_point, Ymeans['pred']-Ymeans['pred'], label='Fitted')
plt.xlabel('Y (um)', fontsize=fs)
plt.ylabel('Scaled Residuals', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Y', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quadratic_fit_resid.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.distplot(Ymeans['resid'])
plt.xlabel('Scaled Residuals', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quadratic_fit_resid_hist.png', format='png', dpi=300)
plt.show()



In [168]:

    
popt = P.polyfit(y_point, synapses, deg=4)
pred = P.polyval(y_point, popt)

Ymeans['pred'] = pred
Ymeans['resid'] = Ymeans['pred']-Ymeans['weighted']
Ymeans['resid'] = (Ymeans['resid']-Ymeans['resid'].mean())/Ymeans['resid'].std()

fs = 18
tfs = 14

plt.figure()
sns.regplot(y_point, Ymeans['weighted'], fit_reg=False, label='Actual')
plt.plot(y_point, Ymeans['pred'], label='Fitted')
plt.legend(fontsize=tfs)
plt.xlabel('Y (um)', fontsize=fs)
plt.ylabel('Mean Number of Synapses', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Y', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quartic_fit.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.regplot(y_point, Ymeans['resid'], fit_reg=False, label='Actual')
plt.plot(y_point, Ymeans['pred']-Ymeans['pred'], label='Fitted')
plt.xlabel('Y (um)', fontsize=fs)
plt.ylabel('Scaled Residuals', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Y', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quartic_fit_resid.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.distplot(Ymeans['resid'])
plt.xlabel('Scaled Residuals', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Y_quartic_fit_resid_hist.png', format='png', dpi=300)
plt.show()



In [134]:

    
Zmeans = df2.groupby('cz').aggregate(np.mean)
z_point = np.array(Zmeans.index)
synapses = np.array(Zmeans['weighted'])

for deg in range(5):
    popt = P.polyfit(z_point, synapses, deg=deg)
    chi_squared = np.array(np.sum((P.polyval(z_point, popt) - synapses) ** 2))
    reduced_chi_squared = chi_squared / (len(z_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(z_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = P.polyfit(np.log(z_point), synapses, deg=1)
chi_squared = np.sum((P.polyval(np.log(z_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(z_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = P.polyfit(np.log(z_point), np.log(synapses), deg=1)
chi_squared = np.sum((np.exp(P.polyval(np.log(z_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(z_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 



print '1D polynomial of degree = 2 gives the best fit'









    



Testing polynomial model of degree = 0
The degrees of freedom for this test is 10
The chi squared value is:  4628.56
The reduced chi squared value is:  462.86

Testing polynomial model of degree = 1
The degrees of freedom for this test is 9
The chi squared value is:  4568.59
The reduced chi squared value is:  507.62

Testing polynomial model of degree = 2
The degrees of freedom for this test is 8
The chi squared value is:  2161.29
The reduced chi squared value is:  270.16

Testing polynomial model of degree = 3
The degrees of freedom for this test is 7
The chi squared value is:  2098.25
The reduced chi squared value is:  299.75

Testing polynomial model of degree = 4
The degrees of freedom for this test is 6
The chi squared value is:  2096.07
The reduced chi squared value is:  349.34

Testing logarithmic model
The degrees of freedom for this test is 36
The chi squared value is:  4521.71
The reduced chi squared value is:  502.41

Testing powerlaw model
The degrees of freedom for this test is 36
The chi squared value is:  4534.34
The reduced chi squared value is:  503.82

1D polynomial of degree = 4 gives the best fit



In [139]:

    
popt = P.polyfit(z_point, synapses, deg=2)
pred = P.polyval(z_point, popt)

Zmeans['pred'] = pred
Zmeans['resid'] = Zmeans['pred']-Zmeans['weighted']
Zmeans['resid'] = (Zmeans['resid']-Zmeans['resid'].mean())/Zmeans['resid'].std()

fs = 18
tfs = 14

plt.figure()
sns.regplot(z_point, Zmeans['weighted'], fit_reg=False, label='Actual')
plt.plot(z_point, Zmeans['pred'], label='Fitted')
plt.legend(fontsize=tfs)
plt.xlabel('Z (um)', fontsize=fs)
plt.ylabel('Mean Number of Synapses', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Z', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Z_quadratic_fit.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.regplot(z_point, Zmeans['resid'], fit_reg=False, label='Actual')
plt.plot(z_point, Zmeans['pred']-Zmeans['pred'], label='Fitted')
plt.xlabel('Z (um)', fontsize=fs)
plt.ylabel('Scaled Residuals', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Fitting in Z', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Z_quadratic_fit_resid.png', format='png', dpi=300)
plt.show()

plt.figure()
sns.distplot(Zmeans['resid'])
plt.xlabel('Scaled Residuals', fontsize=fs)
plt.ylabel('Probability', fontsize=fs)
plt.tick_params(labelsize=tfs)
plt.title('Distribution of Residuals', fontsize=fs)
plt.tight_layout()
plt.savefig('./figs/weighted_synapses/Z_quadratic_fit_resid_hist.png', format='png', dpi=300)
plt.show()

	i	j	k
6292	0	0	0
6293	0	0	1
6294	0	0	2
6295	0	0	3
6296	0	0	4

	cx	cy	cz	unmasked	synapses	weighted
6292	448	1369	55	126357	153	238.063772
6293	448	1369	166	139932	207	290.840237
6294	448	1369	277	150269	194	253.824488
6295	448	1369	388	138071	159	226.410122
6296	448	1369	499	150842	258	336.278119

	cx	cy	weighted
0	448	1369	250.321598
1	448	1408	223.463253
2	448	1447	259.299478
3	448	1486	241.491077
4	448	1525	279.953094

	cx	cy	weighted
0	448	1369	2588.785248
1	448	1408	2343.006049
2	448	1447	2759.237726
3	448	1486	2519.679015
4	448	1525	2931.327981